From f380cc749bc55756de8d31dead2150efcd71e9d1 Mon Sep 17 00:00:00 2001 From: tsteven4 <13596209+tsteven4@users.noreply.github.com> Date: Mon, 27 Jan 2020 14:09:13 -0700 Subject: [PATCH] Delete unused cet bits. (#469) Try to consistently use Latin1 for garmin format. --- CMakeLists.txt | 3 - GPSBabel.pro | 3 - Makefile.in | 14 +- cet.cc | 263 +--------------------- cet.h | 37 ---- cet/ansi_x3_4_1968.h | 505 ------------------------------------------- cet/cp1252.h | 210 ------------------ cet/iso_8859_8.h | 156 ------------- cet_util.cc | 258 +--------------------- cet_util.h | 21 -- defs.h | 2 - garmin.cc | 23 +- main.cc | 5 +- 13 files changed, 21 insertions(+), 1479 deletions(-) delete mode 100644 cet/ansi_x3_4_1968.h delete mode 100644 cet/cp1252.h delete mode 100644 cet/iso_8859_8.h diff --git a/CMakeLists.txt b/CMakeLists.txt index 1a34bbaaf..f2e2bb719 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -104,9 +104,6 @@ set(SUPPORT set(HEADERS an1sym.h cet.h - cet/ansi_x3_4_1968.h - cet/cp1252.h - cet/iso_8859_8.h cet_util.h csv_util.h defs.h diff --git a/GPSBabel.pro b/GPSBabel.pro index 89ed1eb3d..7a31a0231 100644 --- a/GPSBabel.pro +++ b/GPSBabel.pro @@ -92,9 +92,6 @@ SUPPORT = route.cc waypt.cc filter_vecs.cc util.cc vecs.cc mkshort.cc \ HEADERS = \ an1sym.h \ cet.h \ - cet/ansi_x3_4_1968.h \ - cet/cp1252.h \ - cet/iso_8859_8.h \ cet_util.h \ csv_util.h \ defs.h \ diff --git a/Makefile.in b/Makefile.in index 90e2fd249..a9b85af05 100644 --- a/Makefile.in +++ b/Makefile.in @@ -451,8 +451,7 @@ cet.o: cet.cc defs.h config.h zlib/zlib.h zlib/zconf.h cet.h inifile.h \ gbfile.h session.h src/core/datetime.h src/core/optional.h cet_util.o: cet_util.cc defs.h config.h zlib/zlib.h zlib/zconf.h cet.h \ inifile.h gbfile.h session.h src/core/datetime.h src/core/optional.h \ - cet_util.h src/core/logging.h cet/ansi_x3_4_1968.h cet/cp1252.h \ - cet/iso_8859_8.h + cet_util.h src/core/logging.h compegps.o: compegps.cc defs.h config.h zlib/zlib.h zlib/zconf.h cet.h \ inifile.h gbfile.h session.h src/core/datetime.h src/core/optional.h \ cet_util.h csv_util.h jeeps/gpsmath.h jeeps/gpsport.h @@ -465,7 +464,7 @@ delgpl.o: delgpl.cc defs.h config.h zlib/zlib.h zlib/zconf.h cet.h \ inifile.h gbfile.h session.h src/core/datetime.h src/core/optional.h destinator.o: destinator.cc defs.h config.h zlib/zlib.h zlib/zconf.h \ cet.h inifile.h gbfile.h session.h src/core/datetime.h \ - src/core/optional.h cet_util.h garmin_fs.h jeeps/gps.h jeeps/../defs.h \ + src/core/optional.h garmin_fs.h jeeps/gps.h jeeps/../defs.h \ jeeps/gpsport.h jeeps/gpsdevice.h jeeps/gpssend.h jeeps/gpsread.h \ jeeps/gpsutil.h jeeps/gpsapp.h jeeps/gpsprot.h jeeps/gpscom.h \ jeeps/gpsfmt.h jeeps/gpsmath.h jeeps/gpsmem.h jeeps/gpsrqst.h \ @@ -547,8 +546,8 @@ garmin_gpi.o: garmin_gpi.cc defs.h config.h zlib/zlib.h zlib/zconf.h \ jeeps/gpsrqst.h garmin_tables.o: garmin_tables.cc defs.h config.h zlib/zlib.h \ zlib/zconf.h cet.h inifile.h gbfile.h session.h src/core/datetime.h \ - src/core/optional.h garmin_tables.h garmin_icon_tables.h \ - jeeps/gpsmath.h jeeps/gpsport.h src/core/logging.h + src/core/optional.h garmin_tables.h jeeps/gpsmath.h jeeps/gpsport.h \ + src/core/logging.h garmin_icon_tables.h garmin_txt.o: garmin_txt.cc defs.h config.h zlib/zlib.h zlib/zconf.h \ cet.h inifile.h gbfile.h session.h src/core/datetime.h \ src/core/optional.h csv_util.h garmin_fs.h jeeps/gps.h jeeps/../defs.h \ @@ -650,7 +649,7 @@ ignrando.o: ignrando.cc defs.h config.h zlib/zlib.h zlib/zconf.h cet.h \ inifile.h gbfile.h session.h src/core/datetime.h src/core/optional.h \ xmlgeneric.h igo8.o: igo8.cc defs.h config.h zlib/zlib.h zlib/zconf.h cet.h inifile.h \ - gbfile.h session.h src/core/datetime.h src/core/optional.h cet_util.h + gbfile.h session.h src/core/datetime.h src/core/optional.h ik3d.o: ik3d.cc defs.h config.h zlib/zlib.h zlib/zconf.h cet.h inifile.h \ gbfile.h session.h src/core/datetime.h src/core/optional.h \ xmlgeneric.h @@ -822,8 +821,7 @@ mapsource.o: mapsource.cc defs.h config.h zlib/zlib.h zlib/zconf.h cet.h \ inifile.h gbfile.h session.h src/core/datetime.h src/core/optional.h \ garmin_tables.h jeeps/gpsmath.h jeeps/gpsport.h mkshort.o: mkshort.cc defs.h config.h zlib/zlib.h zlib/zconf.h cet.h \ - inifile.h gbfile.h session.h src/core/datetime.h src/core/optional.h \ - cet_util.h + inifile.h gbfile.h session.h src/core/datetime.h src/core/optional.h mmo.o: mmo.cc defs.h config.h zlib/zlib.h zlib/zconf.h cet.h inifile.h \ gbfile.h session.h src/core/datetime.h src/core/optional.h mtk_locus.o: mtk_locus.cc defs.h config.h zlib/zlib.h zlib/zconf.h cet.h \ diff --git a/cet.cc b/cet.cc index 93398a929..5eefa5ef9 100644 --- a/cet.cc +++ b/cet.cc @@ -19,6 +19,8 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */ +#include // for strlen + #include "defs.h" #include "cet.h" @@ -28,33 +30,6 @@ /* %%% single character or value transmission %%% */ /* --------------------------------------------------------------------------- */ -/* %%% cet_char_to_ucs4 %%% - * - * single character to UCS-4 code %%% - * return values: 0 if convertible character, otherwise 1 - */ - -int -cet_char_to_ucs4(const char src, const cet_cs_vec_t* vec, int* value) -{ - int trash; - - int c = ((unsigned char)src & 0xFF); - int* dest = (value != nullptr) ? value : &trash; - - *dest = c; - c -= vec->ucs4_offset; - - if (c < 0) { - return CET_SUCCESS; - } else if ((c >= vec->ucs4_count) || (vec->ucs4_map[c] == -1)) { - return CET_ERROR; - } else { - *dest = vec->ucs4_map[c]; - return CET_SUCCESS; - } -} - /* %%% cet_ucs4_to_utf8 %%% * * convert single UCS-4 value into UTF-8 sequence @@ -190,86 +165,6 @@ cet_utf8_to_ucs4(const char* str, int* bytes, int* value) return CET_ERROR; /* not valid */ } -/* %%% cet_ucs4_to_char %%% - * - * convert single UCS-4 value to original character from CS - * - * return values: converted character or "CET_NOT_CONVERTABLE_DEFAULT" - * if not possible - */ -short -cet_ucs4_to_char(const int value, const cet_cs_vec_t* vec) -{ - const cet_ucs4_link_t* link; - - if ((link = vec->ucs4_link)) { - int i = 0; - int j = vec->ucs4_links - 1; /* validate ucs value against vec */ - while (i <= j) { - int a = (i + j) >> 1; - int x = link[a].value; - - if (x < value) { - i = a + 1; - } else if (x > value) { - j = a - 1; - } else { - return link[a].origin; - } - } - } - - if ((link = vec->ucs4_extra)) { /* can be NULL */ - int i = 0; - int j = vec->ucs4_extras - 1; - while (i <= j) { - int a = (i + j) >> 1; - int x = link[a].value; - - if (x < value) { - i = a + 1; - } else if (x > value) { - j = a - 1; - } else { - return link[a].origin; - } - } - } - - if (value < vec->ucs4_offset + vec->ucs4_count) { - return (char)value & 0xFF; - } else { - if (vec->fallback && (vec->fallback != vec)) { - return cet_ucs4_to_char(value, vec->fallback); - } else { - return CET_NOT_CONVERTABLE_DEFAULT; - } - } -} - -/* %%% cet_utf8_to_char %%% - * - * Convert single UTF-8 sequence directly into associated characters - * by given character set. - */ - -short -cet_utf8_to_char(const char* str, const cet_cs_vec_t* vec, /* out */ int* bytes, int* value) -{ - int b, v; - - cet_utf8_to_ucs4(str, &b, &v); /* decode UTF-8 sequence */ - - if (bytes != nullptr) { - *bytes = b; - } - if (value != nullptr) { - *value = v; - } - - return cet_ucs4_to_char(v, vec); -} - /* =========================================================================== */ /* %%% UTF-8 string manipulation functions %%% */ /* =========================================================================== */ @@ -347,157 +242,3 @@ cet_utf8_strndup(const char* str, const int maxlen) return nullptr; } } - -/* =========================================================================== */ -/* %%% full string transformation %%% */ -/* =========================================================================== */ - -/* %%% cet_str_utf8_to_any %%% - * - * Converts a UTF-8 string to given character set - */ -char* -cet_str_utf8_to_any(const char* src, const cet_cs_vec_t* vec) -{ - const char* c = src; - char* dest; - - if (c == nullptr) { - return nullptr; - } - if (vec->ucs4_count == 0) { - return xstrdup(src); /* UTF-8 -> UTF-8 */ - } - - int len = strlen(c); - char* res = dest = (char*) xmalloc(len + 1); /* target will become smaller or equal length */ - - const char* cend = c + len; - - while (c < cend) { - int bytes; - *dest++ = cet_utf8_to_char(c, vec, &bytes, nullptr); - c += bytes; - } - *dest = '\0'; - - return res; -} - - -/* %%% cet_str_any_to_utf8 %%% - * - * Converts a string from given character set to UTF-8 - */ -char* -cet_str_any_to_utf8(const char* src, const cet_cs_vec_t* vec) -{ - int value; - char* cout; - char temp = CET_NOT_CONVERTABLE_DEFAULT; - - const char* cin = src; - if (cin == nullptr) { - return nullptr; - } - if (vec->ucs4_count == 0) { - return xstrdup(src); /* UTF-8 -> UTF-8 */ - } - - int len = 0; - while (*cin != '\0') { /* determine length of resulting UTF-8 string */ - if (CET_ERROR == cet_char_to_ucs4(*cin++, vec, &value)) { - cet_char_to_ucs4(temp, vec, &value); - } - len += cet_ucs4_to_utf8(nullptr, 6, value); - } - - char* result = cout = (char*) xmalloc(len + 1); - cin = src; - - while (*cin != '\0') { - if (CET_ERROR == cet_char_to_ucs4(*cin++, vec, &value)) { - cet_char_to_ucs4(temp, vec, &value); - } - cout += cet_ucs4_to_utf8(cout, 6, value); - } - *cout = '\0'; - return result; -} - -/* %%% cet_str_uni_to_utf8 %%% - * - * Converts an unicode string to UTF-8 - */ -char* -cet_str_uni_to_utf8(const short* src, const int length) -{ - char* cout; - - if (src == nullptr) { - return nullptr; - } - - int len = 0; - int i = length; - unsigned short* cin = (unsigned short*)src; - - while (i-- > 0) { - len += cet_ucs4_to_utf8(nullptr, 6, le_read16(cin++)); - } - - char* res = cout = (char*) xmalloc(len + 1); - cin = (unsigned short*)src; - i = length; - - while (i-- > 0) { - cout += cet_ucs4_to_utf8(cout, 6, le_read16(cin++)); - } - - *cout = '\0'; - - return res; -} - -/* %%% cet_str_any_to_uni %%% - * - * Converts a string in given character set to a 'wide string' (unicode) - */ -short* -cet_str_any_to_uni(const char* src, const cet_cs_vec_t* vec, int* length) -{ - char* utf8; - short* sout; - - if (! src) { - utf8 = xstrdup(""); - } else if (vec->ucs4_count == 0) { - utf8 = cet_utf8_strdup(src); /* UTF-8 -> clean UTF-8 */ - } else { - utf8 = cet_str_any_to_utf8(src, vec); - } - - int len = cet_utf8_strlen(utf8); - short* res = sout = (short int*) xcalloc(2, len + 1); - - if (len) { - char* cin = utf8; - - while (*cin) { - int bytes, value; - if (CET_SUCCESS == cet_utf8_to_ucs4(cin, &bytes, &value)) { - le_write16(sout, value); - sout++; - } - cin += bytes; - } - } - - *sout = 0; - if (length) { - *length = len; - } - xfree(utf8); - - return res; -} diff --git a/cet.h b/cet.h index a465e00fa..d3cd129e9 100644 --- a/cet.h +++ b/cet.h @@ -27,52 +27,15 @@ #define CET_ERROR 1 #define CET_SUCCESS 0 -struct cet_ucs4_link_t { - int value; /* UCS-4 value */ - short origin; /* associated character */ -}; - -struct cet_cs_vec_t { - const char* name; /* name of character set */ - const char** alias; /* alias table */ - cet_cs_vec_t* fallback; /* fallback character set */ - void* unused; - const int* ucs4_map; /* char to UCS-4 value table */ - const int ucs4_offset; /* first non standard character */ - const int ucs4_count; /* values in table */ - const cet_ucs4_link_t* ucs4_link; /* UCS-4 to char backward links */ - const int ucs4_links; /* number of links */ - const cet_ucs4_link_t* ucs4_extra; /* Non standard UCS-4 to ... */ - const int ucs4_extras; /* number of extra links */ - cet_cs_vec_t* next; -}; - /* single char/value transmission */ int cet_utf8_to_ucs4(const char* str, int* bytes, int* value); int cet_ucs4_to_utf8(char* dest, size_t dest_size, int value); -/* single char/value transmission - vec based */ - -int cet_char_to_ucs4(char src, const cet_cs_vec_t* vec, int* value); -short cet_utf8_to_char(const char* str, const cet_cs_vec_t* vecint, int* bytes, int* value); -short cet_ucs4_to_char(int value, const cet_cs_vec_t* vec); - -/* string to string - vector based */ - -char* cet_str_utf8_to_any(const char* src, const cet_cs_vec_t* vec); -char* cet_str_any_to_utf8(const char* src, const cet_cs_vec_t* vec); - -char* cet_str_uni_to_utf8(const short* src, int length); - /* UTF-8 string manipulation functions */ unsigned int cet_utf8_strlen(const char* str); char* cet_utf8_strdup(const char* str); char* cet_utf8_strndup(const char* str, int maxlen); -/* unicode functions */ - -short* cet_str_any_to_uni(const char* src, const cet_cs_vec_t* vec, int* length); - #endif diff --git a/cet/ansi_x3_4_1968.h b/cet/ansi_x3_4_1968.h deleted file mode 100644 index 1692b2610..000000000 --- a/cet/ansi_x3_4_1968.h +++ /dev/null @@ -1,505 +0,0 @@ -/* - - Data automatically generated from recode output: - - 'recode -lf "ANSI_X3.4-1968" 2>/dev/null' - - - This program is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 2 of the License, or - (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program; if not, write to the Free Software - Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. - -*/ - -#ifndef ansi_x3_4_1968_h -#define ansi_x3_4_1968_h - -#define cet_cs_name_ansi_x3_4_1968 "US-ASCII" - -const char *cet_cs_alias_ansi_x3_4_1968[] = -{ - "ANSI_X3.4-1968", "367", "ANSI_X3.4-1986", "ASCII", - "CP367", "csASCII", "IBM367", "ISO646-US", - "ISO646.1991-IRV", "iso-ir-6", "ISO_646.irv:1991", "us", - nullptr -}; - -#define cet_ucs4_ofs_ansi_x3_4_1968 128 -#define cet_ucs4_cnt_ansi_x3_4_1968 1 - -const int cet_ucs4_map_ansi_x3_4_1968[cet_ucs4_cnt_ansi_x3_4_1968] = {0}; - -#define cet_ucs4_to_ansi_x3_4_1968_ct 1 - -const cet_ucs4_link_t cet_ucs4_to_ansi_x3_4_1968_links[cet_ucs4_to_ansi_x3_4_1968_ct] = {{0, 0}}; - -// #define cet_ucs4_to_ansi_x3_4_1968_extra_ct 200 - -const cet_ucs4_link_t cet_ucs4_to_ansi_x3_4_1968_extra[] = -/* ------------------------------------------*/ -/* !!! sorted by UCS-4 value !!! */ -/* ------------------------------------------*/ -{ - {0x00C0, (unsigned char) 'A'}, // latin capital letter a with grave - {0x00C1, (unsigned char) 'A'}, // latin capital letter a with acute - {0x00C2, (unsigned char) 'A'}, // latin capital letter a with circumflex - {0x00C3, (unsigned char) 'A'}, // latin capital letter a with tilde - {0x00C4, (unsigned char) 'A'}, // latin capital letter a with diaeresis - {0x00C5, (unsigned char) 'A'}, // latin capital letter a with ring above - {0x00C7, (unsigned char) 'C'}, // latin capital letter c with cedilla - {0x00C8, (unsigned char) 'E'}, // latin capital letter e with grave - {0x00C9, (unsigned char) 'E'}, // latin capital letter e with acute - {0x00CA, (unsigned char) 'E'}, // latin capital letter e with circumflex - {0x00CB, (unsigned char) 'E'}, // latin capital letter e with diaeresis - {0x00CC, (unsigned char) 'I'}, // latin capital letter i with grave - {0x00CD, (unsigned char) 'I'}, // latin capital letter i with acute - {0x00CE, (unsigned char) 'I'}, // latin capital letter i with circumflex - {0x00CF, (unsigned char) 'I'}, // latin capital letter i with diaeresis - {0x00D1, (unsigned char) 'N'}, // latin capital letter n with tilde - {0x00D2, (unsigned char) 'O'}, // latin capital letter o with grave - {0x00D3, (unsigned char) 'O'}, // latin capital letter o with acute - {0x00D4, (unsigned char) 'O'}, // latin capital letter o with circumflex - {0x00D5, (unsigned char) 'O'}, // latin capital letter o with tilde - {0x00D6, (unsigned char) 'O'}, // latin capital letter o with diaeresis - {0x00D8, (unsigned char) 'O'}, // latin capital letter o with stroke - {0x00D9, (unsigned char) 'U'}, // latin capital letter u with grave - {0x00DA, (unsigned char) 'U'}, // latin capital letter u with acute - {0x00DB, (unsigned char) 'U'}, // latin capital letter u with circumflex - {0x00DC, (unsigned char) 'U'}, // latin capital letter u with diaeresis - {0x00DD, (unsigned char) 'Y'}, // latin capital letter y with acute - {0x00E0, (unsigned char) 'a'}, // latin small letter a with grave - {0x00E1, (unsigned char) 'a'}, // latin small letter a with acute - {0x00E2, (unsigned char) 'a'}, // latin small letter a with circumflex - {0x00E3, (unsigned char) 'a'}, // latin small letter a with tilde - {0x00E4, (unsigned char) 'a'}, // latin small letter a with diaeresis - {0x00E5, (unsigned char) 'a'}, // latin small letter a with ring above - {0x00E7, (unsigned char) 'c'}, // latin small letter c with cedilla - {0x00E8, (unsigned char) 'e'}, // latin small letter e with grave - {0x00E9, (unsigned char) 'e'}, // latin small letter e with acute - {0x00EA, (unsigned char) 'e'}, // latin small letter e with circumflex - {0x00EB, (unsigned char) 'e'}, // latin small letter e with diaeresis - {0x00EC, (unsigned char) 'i'}, // latin small letter i with grave - {0x00ED, (unsigned char) 'i'}, // latin small letter i with acute - {0x00EE, (unsigned char) 'i'}, // latin small letter i with circumflex - {0x00EF, (unsigned char) 'i'}, // latin small letter i with diaeresis - {0x00F1, (unsigned char) 'n'}, // latin small letter n with tilde - {0x00F2, (unsigned char) 'o'}, // latin small letter o with grave - {0x00F3, (unsigned char) 'o'}, // latin small letter o with acute - {0x00F4, (unsigned char) 'o'}, // latin small letter o with circumflex - {0x00F5, (unsigned char) 'o'}, // latin small letter o with tilde - {0x00F6, (unsigned char) 'o'}, // latin small letter o with diaeresis - {0x00F8, (unsigned char) 'o'}, // latin small letter o with stroke - {0x00F9, (unsigned char) 'u'}, // latin small letter u with grave - {0x00FA, (unsigned char) 'u'}, // latin small letter u with acute - {0x00FB, (unsigned char) 'u'}, // latin small letter u with circumflex - {0x00FC, (unsigned char) 'u'}, // latin small letter u with diaeresis - {0x00FD, (unsigned char) 'y'}, // latin small letter y with acute - {0x00FF, (unsigned char) 'y'}, // latin small letter y with diaeresis - {0x0100, (unsigned char) 'A'}, // latin capital letter a with macron - {0x0101, (unsigned char) 'a'}, // latin small letter a with macron - {0x0102, (unsigned char) 'A'}, // latin capital letter a with breve - {0x0103, (unsigned char) 'a'}, // latin small letter a with breve - {0x0104, (unsigned char) 'A'}, // latin capital letter a with ogonek - {0x0105, (unsigned char) 'a'}, // latin small letter a with ogonek - {0x0106, (unsigned char) 'C'}, // latin capital letter c with acute - {0x0107, (unsigned char) 'c'}, // latin small letter c with acute - {0x0108, (unsigned char) 'C'}, // latin capital letter c with circumflex - {0x0109, (unsigned char) 'c'}, // latin small letter c with circumflex - {0x010A, (unsigned char) 'C'}, // latin capital letter c with dot above - {0x010B, (unsigned char) 'c'}, // latin small letter c with dot above - {0x010C, (unsigned char) 'C'}, // latin capital letter c with caron - {0x010D, (unsigned char) 'c'}, // latin small letter c with caron - {0x010E, (unsigned char) 'D'}, // latin capital letter d with caron - {0x010F, (unsigned char) 'd'}, // latin small letter d with caron - {0x0110, (unsigned char) 'D'}, // latin capital letter d with stroke - {0x0111, (unsigned char) 'd'}, // latin small letter d with stroke - {0x0112, (unsigned char) 'E'}, // latin capital letter e with macron - {0x0113, (unsigned char) 'e'}, // latin small letter e with macron - {0x0114, (unsigned char) 'E'}, // latin capital letter e with breve - {0x0115, (unsigned char) 'e'}, // latin small letter e with breve - {0x0116, (unsigned char) 'E'}, // latin capital letter e with dot above - {0x0117, (unsigned char) 'e'}, // latin small letter e with dot above - {0x0118, (unsigned char) 'E'}, // latin capital letter e with ogonek - {0x0119, (unsigned char) 'e'}, // latin small letter e with ogonek - {0x011A, (unsigned char) 'E'}, // latin capital letter e with caron - {0x011B, (unsigned char) 'e'}, // latin small letter e with caron - {0x011C, (unsigned char) 'G'}, // latin capital letter g with circumflex - {0x011D, (unsigned char) 'g'}, // latin small letter g with circumflex - {0x011E, (unsigned char) 'G'}, // latin capital letter g with breve - {0x011F, (unsigned char) 'g'}, // latin small letter g with breve - {0x0120, (unsigned char) 'G'}, // latin capital letter g with dot above - {0x0121, (unsigned char) 'g'}, // latin small letter g with dot above - {0x0122, (unsigned char) 'G'}, // latin capital letter g with cedilla - {0x0123, (unsigned char) 'g'}, // latin small letter g with cedilla - {0x0124, (unsigned char) 'H'}, // latin capital letter h with circumflex - {0x0125, (unsigned char) 'h'}, // latin small letter h with circumflex - {0x0126, (unsigned char) 'H'}, // latin capital letter h with stroke - {0x0127, (unsigned char) 'h'}, // latin small letter h with stroke - {0x0128, (unsigned char) 'I'}, // latin capital letter i with tilde - {0x0129, (unsigned char) 'i'}, // latin small letter i with tilde - {0x012A, (unsigned char) 'I'}, // latin capital letter i with macron - {0x012B, (unsigned char) 'i'}, // latin small letter i with macron - {0x012C, (unsigned char) 'I'}, // latin capital letter i with breve - {0x012D, (unsigned char) 'i'}, // latin small letter i with breve - {0x012E, (unsigned char) 'I'}, // latin capital letter i with ogonek - {0x012F, (unsigned char) 'i'}, // latin small letter i with ogonek - {0x0130, (unsigned char) 'I'}, // latin capital letter i with dot above - {0x0131, (unsigned char) 'i'}, // latin small letter dotless i - {0x0134, (unsigned char) 'J'}, // latin capital letter j with circumflex - {0x0135, (unsigned char) 'j'}, // latin small letter j with circumflex - {0x0136, (unsigned char) 'K'}, // latin capital letter k with cedilla - {0x0137, (unsigned char) 'k'}, // latin small letter k with cedilla - {0x0139, (unsigned char) 'L'}, // latin capital letter l with acute - {0x013A, (unsigned char) 'l'}, // latin small letter l with acute - {0x013B, (unsigned char) 'L'}, // latin capital letter l with cedilla - {0x013C, (unsigned char) 'l'}, // latin small letter l with cedilla - {0x013D, (unsigned char) 'L'}, // latin capital letter l with caron - {0x013E, (unsigned char) 'l'}, // latin small letter l with caron - {0x0141, (unsigned char) 'L'}, // latin capital letter l with stroke - {0x0142, (unsigned char) 'l'}, // latin small letter l with stroke - {0x0143, (unsigned char) 'N'}, // latin capital letter n with acute - {0x0144, (unsigned char) 'n'}, // latin small letter n with acute - {0x0145, (unsigned char) 'N'}, // latin capital letter n with cedilla - {0x0146, (unsigned char) 'n'}, // latin small letter n with cedilla - {0x0147, (unsigned char) 'N'}, // latin capital letter n with caron - {0x0148, (unsigned char) 'n'}, // latin small letter n with caron - {0x014C, (unsigned char) 'O'}, // latin capital letter o with macron - {0x014D, (unsigned char) 'o'}, // latin small letter o with macron - {0x014E, (unsigned char) 'O'}, // latin capital letter o with breve - {0x014F, (unsigned char) 'o'}, // latin small letter o with breve - {0x0150, (unsigned char) 'O'}, // latin capital letter o with double acute - {0x0151, (unsigned char) 'o'}, // latin small letter o with double acute - {0x0152, (unsigned char) 'O'}, // latin capital ligature oe - {0x0153, (unsigned char) 'o'}, // latin small ligature oe - {0x0154, (unsigned char) 'R'}, // latin capital letter r with acute - {0x0155, (unsigned char) 'r'}, // latin small letter r with acute - {0x0156, (unsigned char) 'R'}, // latin capital letter r with cedilla - {0x0157, (unsigned char) 'r'}, // latin small letter r with cedilla - {0x0158, (unsigned char) 'R'}, // latin capital letter r with caron - {0x0159, (unsigned char) 'r'}, // latin small letter r with caron - {0x015A, (unsigned char) 'S'}, // latin capital letter s with acute - {0x015B, (unsigned char) 's'}, // latin small letter s with acute - {0x015C, (unsigned char) 'S'}, // latin capital letter s with circumflex - {0x015D, (unsigned char) 's'}, // latin small letter s with circumflex - {0x015E, (unsigned char) 'S'}, // latin capital letter s with cedilla - {0x015F, (unsigned char) 's'}, // latin small letter s with cedilla - {0x0160, (unsigned char) 'S'}, // latin capital letter s with caron - {0x0161, (unsigned char) 's'}, // latin small letter s with caron - {0x0162, (unsigned char) 'T'}, // latin capital letter t with cedilla - {0x0163, (unsigned char) 't'}, // latin small letter t with cedilla - {0x0164, (unsigned char) 'T'}, // latin capital letter t with caron - {0x0165, (unsigned char) 't'}, // latin small letter t with caron - {0x0166, (unsigned char) 'T'}, // latin capital letter t with stroke - {0x0167, (unsigned char) 't'}, // latin small letter t with stroke - {0x0168, (unsigned char) 'U'}, // latin capital letter u with tilde - {0x0169, (unsigned char) 'u'}, // latin small letter u with tilde - {0x016A, (unsigned char) 'U'}, // latin capital letter u with macron - {0x016B, (unsigned char) 'u'}, // latin small letter u with macron - {0x016C, (unsigned char) 'U'}, // latin capital letter u with breve - {0x016D, (unsigned char) 'u'}, // latin small letter u with breve - {0x016E, (unsigned char) 'U'}, // latin capital letter u with ring above - {0x016F, (unsigned char) 'u'}, // latin small letter u with ring above - {0x0170, (unsigned char) 'U'}, // latin capital letter u with double acute - {0x0171, (unsigned char) 'u'}, // latin small letter u with double acute - {0x0172, (unsigned char) 'U'}, // latin capital letter u with ogonek - {0x0173, (unsigned char) 'u'}, // latin small letter u with ogonek - {0x0174, (unsigned char) 'W'}, // latin capital letter w with circumflex - {0x0175, (unsigned char) 'w'}, // latin small letter w with circumflex - {0x0176, (unsigned char) 'Y'}, // latin capital letter y with circumflex - {0x0177, (unsigned char) 'y'}, // latin small letter y with circumflex - {0x0178, (unsigned char) 'Y'}, // latin capital letter y with diaeresis - {0x0179, (unsigned char) 'Z'}, // latin capital letter z with acute - {0x017A, (unsigned char) 'z'}, // latin small letter z with acute - {0x017B, (unsigned char) 'Z'}, // latin capital letter z with dot above - {0x017C, (unsigned char) 'z'}, // latin small letter z with dot above - {0x017D, (unsigned char) 'Z'}, // latin capital letter z with caron - {0x017E, (unsigned char) 'z'}, // latin small letter z with caron - {0x0180, (unsigned char) 'b'}, // latin small letter b with stroke - {0x0189, (unsigned char) 'D'}, // latin capital letter african d - {0x0191, (unsigned char) 'F'}, // latin capital letter f with hook - {0x0192, (unsigned char) 'f'}, // latin small letter f with hook - {0x0197, (unsigned char) 'I'}, // latin capital letter i with stroke - {0x019A, (unsigned char) 'l'}, // latin small letter l with bar - {0x019F, (unsigned char) 'O'}, // latin capital letter o with middle tilde - {0x01A0, (unsigned char) 'O'}, // latin capital letter o with horn - {0x01A1, (unsigned char) 'o'}, // latin small letter o with horn - {0x01AB, (unsigned char) 't'}, // latin small letter t with palatal hook - {0x01AE, (unsigned char) 'T'}, // latin capital letter t with retroflex hook - {0x01AF, (unsigned char) 'U'}, // latin capital letter u with horn - {0x01B0, (unsigned char) 'u'}, // latin small letter u with horn - {0x01B6, (unsigned char) 'z'}, // latin small letter z with stroke - {0x01C0, (unsigned char) '|'}, // latin letter dental click - {0x01C3, (unsigned char) '!'}, // latin letter retroflex click - {0x01CD, (unsigned char) 'A'}, // latin capital letter a with caron - {0x01CE, (unsigned char) 'a'}, // latin small letter a with caron - {0x01CF, (unsigned char) 'I'}, // latin capital letter i with caron - {0x01D0, (unsigned char) 'i'}, // latin small letter i with caron - {0x01D1, (unsigned char) 'O'}, // latin capital letter o with caron - {0x01D2, (unsigned char) 'o'}, // latin small letter o with caron - {0x01D3, (unsigned char) 'U'}, // latin capital letter u with caron - {0x01D4, (unsigned char) 'u'}, // latin small letter u with caron - {0x01D5, (unsigned char) 'U'}, // latin capital letter u with diaeresis and macron - {0x01D6, (unsigned char) 'u'}, // latin small letter u with diaeresis and macron - {0x01D7, (unsigned char) 'U'}, // latin capital letter u with diaeresis and acute - {0x01D8, (unsigned char) 'u'}, // latin small letter u with diaeresis and acute - {0x01D9, (unsigned char) 'U'}, // latin capital letter u with diaeresis and caron - {0x01DA, (unsigned char) 'u'}, // latin small letter u with diaeresis and caron - {0x01DB, (unsigned char) 'U'}, // latin capital letter u with diaeresis and grave - {0x01DC, (unsigned char) 'u'}, // latin small letter u with diaeresis and grave - {0x01DE, (unsigned char) 'A'}, // latin capital letter a with diaeresis and macron - {0x01DF, (unsigned char) 'a'}, // latin small letter a with diaeresis and macron - {0x01E4, (unsigned char) 'G'}, // latin capital letter g with stroke - {0x01E5, (unsigned char) 'g'}, // latin small letter g with stroke - {0x01E6, (unsigned char) 'G'}, // latin capital letter g with caron - {0x01E7, (unsigned char) 'g'}, // latin small letter g with caron - {0x01E8, (unsigned char) 'K'}, // latin capital letter k with caron - {0x01E9, (unsigned char) 'k'}, // latin small letter k with caron - {0x01EA, (unsigned char) 'O'}, // latin capital letter o with ogonek - {0x01EB, (unsigned char) 'o'}, // latin small letter o with ogonek - {0x01EC, (unsigned char) 'O'}, // latin capital letter o with ogonek and macron - {0x01ED, (unsigned char) 'o'}, // latin small letter o with ogonek and macron - {0x01F0, (unsigned char) 'j'}, // latin small letter j with caron - {0x0261, (unsigned char) 'g'}, // latin small letter script g - {0x02B9, (unsigned char) '\''}, // modifier letter prime - {0x02BA, (unsigned char) '"'}, // modifier letter double prime - {0x02BC, (unsigned char) '\''}, // modifier letter apostrophe - {0x02C4, (unsigned char) '^'}, // modifier letter up arrowhead - {0x02C6, (unsigned char) '^'}, // modifier letter circumflex accent - {0x02C7, (unsigned char) '^'}, // caron - {0x02C8, (unsigned char) '\''}, // modifier letter vertical line - {0x02CB, (unsigned char) '`'}, // modifier letter grave accent - {0x02CD, (unsigned char) '_'}, // modifier letter low macron - {0x02D8, (unsigned char) '^'}, // circumflex accent - {0x02D9, (unsigned char) '\''}, // dot above - {0x02DC, (unsigned char) '~'}, // small tilde - {0x0300, (unsigned char) '`'}, // combining grave accent - {0x0302, (unsigned char) '^'}, // combining circumflex accent - {0x0303, (unsigned char) '~'}, // combining tilde - {0x030E, (unsigned char) '"'}, // combining double vertical line above - {0x0331, (unsigned char) '_'}, // combining macron below - {0x0332, (unsigned char) '_'}, // combining low line - {0x037E, (unsigned char) ';'}, // greek question mark - {0x0393, (unsigned char) 'G'}, // greek capital letter gamma - {0x0398, (unsigned char) 'T'}, // greek capital letter theta - {0x03A3, (unsigned char) 'S'}, // greek capital letter sigma - {0x03A6, (unsigned char) 'F'}, // greek capital letter phi - {0x03A9, (unsigned char) 'O'}, // greek capital letter omega - {0x03B1, (unsigned char) 'a'}, // greek small letter alpha - {0x03B4, (unsigned char) 'd'}, // greek small letter delta - {0x03B5, (unsigned char) 'e'}, // greek small letter epsilon - {0x03C0, (unsigned char) 'p'}, // greek small letter pi - {0x03C3, (unsigned char) 's'}, // greek small letter sigma - {0x03C4, (unsigned char) 't'}, // greek small letter tau - {0x03C6, (unsigned char) 'f'}, // greek small letter phi - {0x04BB, (unsigned char) 'h'}, // cyrillic small letter shha - {0x0589, (unsigned char) ':'}, // armenian full stop - {0x066A, (unsigned char) '%'}, // arabic percent sign - {0x1E02, (unsigned char) 'B'}, // latin capital letter b with dot above - {0x1E03, (unsigned char) 'b'}, // latin small letter b with dot above - {0x1E0A, (unsigned char) 'D'}, // latin capital letter d with dot above - {0x1E0B, (unsigned char) 'd'}, // latin small letter d with dot above - {0x1E1E, (unsigned char) 'F'}, // latin capital letter f with dot above - {0x1E1F, (unsigned char) 'f'}, // latin small letter f with dot above - {0x1E40, (unsigned char) 'M'}, // latin capital letter m with dot above - {0x1E41, (unsigned char) 'm'}, // latin small letter m with dot above - {0x1E56, (unsigned char) 'P'}, // latin capital letter p with dot above - {0x1E57, (unsigned char) 'p'}, // latin small letter p with dot above - {0x1E60, (unsigned char) 'S'}, // latin capital letter s with dot above - {0x1E61, (unsigned char) 's'}, // latin small letter s with dot above - {0x1E6A, (unsigned char) 'T'}, // latin capital letter t with dot above - {0x1E6B, (unsigned char) 't'}, // latin small letter t with dot above - {0x1E80, (unsigned char) 'W'}, // latin capital letter w with grave - {0x1E81, (unsigned char) 'w'}, // latin small letter w with grave - {0x1E82, (unsigned char) 'W'}, // latin capital letter w with acute - {0x1E83, (unsigned char) 'w'}, // latin small letter w with acute - {0x1E84, (unsigned char) 'W'}, // latin capital letter w with diaeresis - {0x1E85, (unsigned char) 'w'}, // latin small letter w with diaeresis - {0x1E94, (unsigned char) 'u'}, - {0x1EF2, (unsigned char) 'Y'}, // latin capital letter y with grave - {0x1EF3, (unsigned char) 'y'}, // latin small letter y with grave - {0x2000, (unsigned char) ' '}, // en quad - {0x2001, (unsigned char) ' '}, // em quad - {0x2002, (unsigned char) ' '}, // en space - {0x2003, (unsigned char) ' '}, // em space - {0x2004, (unsigned char) ' '}, // three-per-em space - {0x2005, (unsigned char) ' '}, // four-per-em space - {0x2006, (unsigned char) ' '}, // six-per-em space - {0x2010, (unsigned char) '-'}, // hyphen - {0x2011, (unsigned char) '-'}, // non-breaking hyphen - {0x2013, (unsigned char) '-'}, // en dash - {0x2014, (unsigned char) '-'}, // em dash - {0x2017, (unsigned char) '='}, // double low line - {0x2018, (unsigned char) '`'}, // left single quotation mark - {0x2019, (unsigned char) '\''}, // right single quotation mark - {0x201C, (unsigned char) '"'}, - {0x201D, (unsigned char) '"'}, - {0x2032, (unsigned char) '\''}, // prime - {0x2035, (unsigned char) '`'}, // reversed prime - {0x203C, (unsigned char) '!'}, // double exclamation mark - {0x2044, (unsigned char) '/'}, // fraction slash - {0x2074, (unsigned char) '4'}, // superscript four - {0x2075, (unsigned char) '5'}, // superscript five - {0x2076, (unsigned char) '6'}, // superscript six - {0x2077, (unsigned char) '7'}, // superscript seven - {0x2078, (unsigned char) '8'}, // superscript eight - {0x207F, (unsigned char) 'n'}, // superscript latin small letter n - {0x2080, (unsigned char) '0'}, // subscript zero - {0x2081, (unsigned char) '1'}, // subscript one - {0x2082, (unsigned char) '2'}, // subscript two - {0x2083, (unsigned char) '3'}, // subscript three - {0x2084, (unsigned char) '4'}, // subscript four - {0x2085, (unsigned char) '5'}, // subscript five - {0x2086, (unsigned char) '6'}, // subscript six - {0x2087, (unsigned char) '7'}, // subscript seven - {0x2088, (unsigned char) '8'}, // subscript eight - {0x2089, (unsigned char) '9'}, // subscript nine - {0x20A7, (unsigned char) 'P'}, // peseta sign - {0x2102, (unsigned char) 'C'}, // double-struck capital c - {0x2107, (unsigned char) 'E'}, // euler constant - {0x210A, (unsigned char) 'g'}, // script small g - {0x210B, (unsigned char) 'H'}, // script capital h - {0x210C, (unsigned char) 'H'}, // black-letter capital h - {0x210D, (unsigned char) 'H'}, // double-struck capital h - {0x210E, (unsigned char) 'h'}, // planck constant - {0x2110, (unsigned char) 'I'}, // script capital i - {0x2111, (unsigned char) 'I'}, // black-letter capital i - {0x2112, (unsigned char) 'L'}, // script capital l - {0x2113, (unsigned char) 'l'}, // script small l - {0x2115, (unsigned char) 'N'}, // double-struck capital n - {0x2118, (unsigned char) 'P'}, // script capital p - {0x2119, (unsigned char) 'P'}, // double-struck capital p - {0x211A, (unsigned char) 'Q'}, // double-struck capital q - {0x211B, (unsigned char) 'R'}, // script capital r - {0x211C, (unsigned char) 'R'}, // black-letter capital r - {0x211D, (unsigned char) 'R'}, // double-struck capital r - {0x2124, (unsigned char) 'Z'}, // double-struck capital z - {0x2128, (unsigned char) 'Z'}, // black-letter capital z - {0x212A, (unsigned char) 'K'}, // kelvin sign - {0x212C, (unsigned char) 'B'}, // script capital b - {0x212D, (unsigned char) 'C'}, // black-letter capital c - {0x212E, (unsigned char) 'e'}, // estimated symbol - {0x212F, (unsigned char) 'e'}, // script small e - {0x2130, (unsigned char) 'E'}, // script capital e - {0x2131, (unsigned char) 'F'}, // script capital f - {0x2133, (unsigned char) 'M'}, // script capital m - {0x2134, (unsigned char) 'o'}, // script small o - {0x2190, (unsigned char) '<'}, // leftwards arrow - {0x2191, (unsigned char) '^'}, // upwards arrow - {0x2192, (unsigned char) '>'}, // rightwards arrow - {0x2193, (unsigned char) 'v'}, // downwards arrow - {0x2194, (unsigned char) '-'}, // left right arrow - {0x2195, (unsigned char) '|'}, // up down arrow - {0x21A8, (unsigned char) '|'}, // up down arrow with base - {0x2212, (unsigned char) '-'}, // minus sign - {0x2215, (unsigned char) '/'}, // division slash - {0x2216, (unsigned char) '\\'}, // set minus - {0x2217, (unsigned char) '*'}, // asterisk operator - {0x221A, (unsigned char) 'v'}, // square root - {0x221E, (unsigned char) '8'}, // infinity - {0x221F, (unsigned char) 'L'}, // right angle - {0x2223, (unsigned char) '|'}, // divides - {0x2229, (unsigned char) 'n'}, // intersection - {0x2236, (unsigned char) ':'}, // ratio - {0x223C, (unsigned char) '~'}, // tilde operator - {0x2261, (unsigned char) '='}, // identical to - {0x2264, (unsigned char) '='}, // less-than or equal to - {0x2265, (unsigned char) '='}, // greater-than or equal to - {0x2303, (unsigned char) '^'}, // up arrowhead - {0x2320, (unsigned char) '('}, // top half integral - {0x2321, (unsigned char) ')'}, // bottom half integral - {0x2329, (unsigned char) '<'}, // left-pointing angle bracket - {0x232A, (unsigned char) '>'}, // right-pointing angle bracket - {0x25AC, (unsigned char) '-'}, // black rectangle - {0x25B2, (unsigned char) '^'}, // black up-pointing triangle - {0x25BA, (unsigned char) '>'}, // black right-pointing pointer - {0x25C4, (unsigned char) '<'}, // black left-pointing pointer - {0x25CB, (unsigned char) '0'}, // white circle - {0x25D9, (unsigned char) '0'}, // inverse white circle - {0x263A, (unsigned char) 'O'}, // white smiling face - {0x263B, (unsigned char) 'O'}, // black smiling face - {0x263C, (unsigned char) '0'}, // white sun with rays - {0x2640, (unsigned char) '+'}, // female sign - {0x2642, (unsigned char) '>'}, // male sign - {0x266A, (unsigned char) 'd'}, // eighth note - {0x266B, (unsigned char) 'd'}, // beamed eighth notes - {0x2758, (unsigned char) '|'}, // light vertical bar - {0x3000, (unsigned char) ' '}, // ideographic space - {0x3008, (unsigned char) '<'}, // left angle bracket - {0x3009, (unsigned char) '>'}, // right angle bracket - {0x301A, (unsigned char) '['}, // left white square bracket - {0x301B, (unsigned char) ']'}, // right white square bracket - {0x301D, (unsigned char) '"'}, // reversed double prime quotation mark - {0x301E, (unsigned char) '"'} // double prime quotation mark -}; - -cet_cs_vec_t cet_cs_vec_ansi_x3_4_1968 = /* defined in cet.h */ -{ - cet_cs_name_ansi_x3_4_1968, /* name of character set */ - cet_cs_alias_ansi_x3_4_1968, /* alias table */ - - nullptr, /* fallback character set */ - nullptr, /* unused */ - - cet_ucs4_map_ansi_x3_4_1968, /* char to UCS-4 value table */ - cet_ucs4_ofs_ansi_x3_4_1968, /* first non standard character */ - cet_ucs4_cnt_ansi_x3_4_1968, /* number of values in table */ - - cet_ucs4_to_ansi_x3_4_1968_links, /* UCS-4 to char links */ - cet_ucs4_to_ansi_x3_4_1968_ct, /* number of links */ - - cet_ucs4_to_ansi_x3_4_1968_extra, /* hand made UCS-4 links */ - sizeof(cet_ucs4_to_ansi_x3_4_1968_extra) / sizeof(cet_ucs4_to_ansi_x3_4_1968_extra[0]), /* number of extra links */ - - nullptr /* for internal use */ -}; - - -/* -const int ansi_x3_4_1968_ucs4_full_map[] = -{ - 0x0000, 0x0001, 0x0002, 0x0003, 0x0004, 0x0005, 0x0006, 0x0007, - 0x0008, 0x0009, 0x000a, 0x000b, 0x000c, 0x000d, 0x000e, 0x000f, - 0x0010, 0x0011, 0x0012, 0x0013, 0x0014, 0x0015, 0x0016, 0x0017, - 0x0018, 0x0019, 0x001a, 0x001b, 0x001c, 0x001d, 0x001e, 0x001f, - 0x0020, 0x0021, 0x0022, 0x0023, 0x0024, 0x0025, 0x0026, 0x0027, - 0x0028, 0x0029, 0x002a, 0x002b, 0x002c, 0x002d, 0x002e, 0x002f, - 0x0030, 0x0031, 0x0032, 0x0033, 0x0034, 0x0035, 0x0036, 0x0037, - 0x0038, 0x0039, 0x003a, 0x003b, 0x003c, 0x003d, 0x003e, 0x003f, - 0x0040, 0x0041, 0x0042, 0x0043, 0x0044, 0x0045, 0x0046, 0x0047, - 0x0048, 0x0049, 0x004a, 0x004b, 0x004c, 0x004d, 0x004e, 0x004f, - 0x0050, 0x0051, 0x0052, 0x0053, 0x0054, 0x0055, 0x0056, 0x0057, - 0x0058, 0x0059, 0x005a, 0x005b, 0x005c, 0x005d, 0x005e, 0x005f, - 0x0060, 0x0061, 0x0062, 0x0063, 0x0064, 0x0065, 0x0066, 0x0067, - 0x0068, 0x0069, 0x006a, 0x006b, 0x006c, 0x006d, 0x006e, 0x006f, - 0x0070, 0x0071, 0x0072, 0x0073, 0x0074, 0x0075, 0x0076, 0x0077, - 0x0078, 0x0079, 0x007a, 0x007b, 0x007c, 0x007d, 0x007e, 0x007f, - -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1 -}; -*/ - -#endif diff --git a/cet/cp1252.h b/cet/cp1252.h deleted file mode 100644 index 458b54015..000000000 --- a/cet/cp1252.h +++ /dev/null @@ -1,210 +0,0 @@ -/* - - Data automatically generated from recode output: - - 'recode -lf "CP1252" 2>/dev/null' - - - This program is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 2 of the License, or - (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program; if not, write to the Free Software - Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. - -*/ - -#ifndef cp1252_h -#define cp1252_h - -#define cet_cs_name_cp1252 "CP1252" - -const char *cet_cs_alias_cp1252[] = -{ - "CP1252", "1252", "ms-ansi", "windows-1252", "WIN-CP1252", - nullptr -}; - -#define cet_ucs4_ofs_cp1252 128 -#define cet_ucs4_cnt_cp1252 128 - -const int cet_ucs4_map_cp1252[cet_ucs4_cnt_cp1252] = -{ - 0x20ac, -1, 0x201a, 0x0192, 0x201e, 0x2026, 0x2020, 0x2021, - 0x02c6, 0x2030, 0x0160, 0x2039, 0x0152, -1, 0x017d, -1, - -1, 0x2018, 0x2019, 0x201c, 0x201d, 0x2022, 0x2013, 0x2014, - 0x02dc, 0x2122, 0x0161, 0x203a, 0x0153, -1, 0x017e, 0x0178, - 0x00a0, 0x00a1, 0x00a2, 0x00a3, 0x00a4, 0x00a5, 0x00a6, 0x00a7, - 0x00a8, 0x00a9, 0x00aa, 0x00ab, 0x00ac, 0x00ad, 0x00ae, 0x00af, - 0x00b0, 0x00b1, 0x00b2, 0x00b3, 0x00b4, 0x00b5, 0x00b6, 0x00b7, - 0x00b8, 0x00b9, 0x00ba, 0x00bb, 0x00bc, 0x00bd, 0x00be, 0x00bf, - 0x00c0, 0x00c1, 0x00c2, 0x00c3, 0x00c4, 0x00c5, 0x00c6, 0x00c7, - 0x00c8, 0x00c9, 0x00ca, 0x00cb, 0x00cc, 0x00cd, 0x00ce, 0x00cf, - 0x00d0, 0x00d1, 0x00d2, 0x00d3, 0x00d4, 0x00d5, 0x00d6, 0x00d7, - 0x00d8, 0x00d9, 0x00da, 0x00db, 0x00dc, 0x00dd, 0x00de, 0x00df, - 0x00e0, 0x00e1, 0x00e2, 0x00e3, 0x00e4, 0x00e5, 0x00e6, 0x00e7, - 0x00e8, 0x00e9, 0x00ea, 0x00eb, 0x00ec, 0x00ed, 0x00ee, 0x00ef, - 0x00f0, 0x00f1, 0x00f2, 0x00f3, 0x00f4, 0x00f5, 0x00f6, 0x00f7, - 0x00f8, 0x00f9, 0x00fa, 0x00fb, 0x00fc, 0x00fd, 0x00fe, 0x00ff -}; - -#define cet_ucs4_to_cp1252_ct 27 - -const cet_ucs4_link_t cet_ucs4_to_cp1252_links[cet_ucs4_to_cp1252_ct] = -{ - {0x0152, 0x8c} /* capital ligature oe */, - {0x0153, 0x9c} /* small ligature oe */, - {0x0160, 0x8a} /* capital letter s with caron */, - {0x0161, 0x9a} /* small letter s with caron */, - {0x0178, 0x9f} /* capital letter y with diaeresis */, - {0x017d, 0x8e} /* capital letter z with caron */, - {0x017e, 0x9e} /* small letter z with caron */, - {0x0192, 0x83} /* minuscule latine f hameçon */, - {0x02c6, 0x88} /* modificative accent circonflexe */, - {0x02dc, 0x98} /* tilde */, - {0x2013, 0x96} /* dash */, - {0x2014, 0x97} /* dash */, - {0x2018, 0x91} /* single quotation mark */, - {0x2019, 0x92} /* single quotation mark */, - {0x201a, 0x82} /* low-9 quotation mark */, - {0x201c, 0x93} /* double quotation mark */, - {0x201d, 0x94} /* double quotation mark */, - {0x201e, 0x84} /* low-9 quotation mark */, - {0x2020, 0x86} /* dagger */, - {0x2021, 0x87} /* dagger */, - {0x2022, 0x95} /* puce */, - {0x2026, 0x85} /* horizontal ellipsis */, - {0x2030, 0x89} /* mille sign */, - {0x2039, 0x8b} /* left-pointing angle quotation mark */, - {0x203a, 0x9b} /* right-pointing angle quotation mark */, - {0x20ac, 0x80} /* euro */, - {0x2122, 0x99} /* mark sign */ -}; - -/* Extra table was generated from bestfit1252.txt located at - ftp.unicode.org:/Public/MAPPINGS/VENDORS/MICSFT/WindowsBestFit/ */ - -const cet_ucs4_link_t cet_ucs4_to_cp1252_extra[] = -{ - {0x0110, 0xd0} /* latin capital letter d with stroke */, - {0x0189, 0xd0} /* latin capital letter african d */, - {0x0191, 0x83} /* latin capital letter f with hook */, - {0x02c9, 0xaf} /* modifier letter macron */, - {0x02ca, 0xb4} /* modifier letter acute accent */, - {0x02da, 0xb0} /* ring above */, - {0x0301, 0xb4} /* combining acute accent */, - {0x0304, 0xaf} /* combining macron */, - {0x0305, 0xaf} /* combining overline */, - {0x0308, 0xa8} /* combining diaeresis */, - {0x030a, 0xb0} /* combining ring above */, - {0x0327, 0xb8} /* combining cedilla */, - {0x03b2, 0xdf} /* greek small letter beta */, - {0x03bc, 0xb5} /* greek small letter mu */, - {0x2024, 0xb7} /* one dot leader */, - {0x2070, 0xb0} /* superscript zero */, - {0x20a1, 0xa2} /* colon sign */, - {0x20a4, 0xa3} /* lira sign */, - {0x212b, 0xc5} /* angstrom sign */, - {0x2205, 0xd8} /* empty set */, - {0x2213, 0xb1} /* minus-or-plus sign */, - {0x2218, 0xb0} /* ring operator */, - {0x2219, 0xb7} /* bullet operator */, - {0x2248, 0x98} /* almost equal to */, - {0x226a, 0xab} /* much less-than */, - {0x226b, 0xbb} /* much greater-than */, - {0x22c5, 0xb7} /* dot operator */, - {0x2302, 0xa6} /* house */, - {0x2310, 0xac} /* reversed not sign */, - {0x2502, 0xa6} /* box drawings light vertical */, - {0x2524, 0xa6} /* box drawings light vertical and left */, - {0x2551, 0xa6} /* box drawings double vertical */, - {0x255e, 0xa6} /* box drawings vertical single and right double */, - {0x255f, 0xa6} /* box drawings vertical double and right single */, - {0x2560, 0xa6} /* box drawings double vertical and right */, - {0x2561, 0xa6} /* box drawings vertical single and left double */, - {0x2562, 0xa6} /* box drawings vertical double and left single */, - {0x2563, 0xa6} /* box drawings double vertical and left */, - {0x2580, 0xaf} /* upper half block */, - {0x2588, 0xa6} /* full block */, - {0x258c, 0xa6} /* left half block */, - {0x2590, 0xa6} /* right half block */, - {0x2591, 0xa6} /* light shade */, - {0x2592, 0xa6} /* medium shade */, - {0x2593, 0xa6} /* dark shade */, - {0x25a0, 0xa6} /* black square */, - {0x263c, 0xa4} /* white sun with rays */, - {0x300a, 0xab} /* left double angle bracket */, - {0x300b, 0xbb} /* right double angle bracket */, - {0x30fb, 0xb7} /* katakana middle dot */ -}; - -#define cet_ucs4_to_cp1252_extra_ct sizeof(cet_ucs4_to_cp1252_extra) / sizeof(cet_ucs4_to_cp1252_extra[0]) - -cet_cs_vec_t cet_cs_vec_cp1252 = /* defined in cet.h */ -{ - cet_cs_name_cp1252, /* name of character set */ - cet_cs_alias_cp1252, /* alias table */ - - nullptr, /* ... to UCS-4 converter (multi-byte) */ - nullptr, /* UCS-4 to ... converter (multi-byte) */ - - cet_ucs4_map_cp1252, /* char to UCS-4 value table */ - cet_ucs4_ofs_cp1252, /* first non standard character */ - cet_ucs4_cnt_cp1252, /* number of values in table */ - - cet_ucs4_to_cp1252_links, /* UCS-4 to char links */ - cet_ucs4_to_cp1252_ct, /* number of links */ - - cet_ucs4_to_cp1252_extra, /* hand made UCS-4 links */ - cet_ucs4_to_cp1252_extra_ct, /* number of extra links */ - - nullptr /* for internal use */ -}; - - -/* -const int cp1252_ucs4_full_map[] = -{ - 0x0000, 0x0001, 0x0002, 0x0003, 0x0004, 0x0005, 0x0006, 0x0007, - 0x0008, 0x0009, 0x000a, 0x000b, 0x000c, 0x000d, 0x000e, 0x000f, - 0x0010, 0x0011, 0x0012, 0x0013, 0x0014, 0x0015, 0x0016, 0x0017, - 0x0018, 0x0019, 0x001a, 0x001b, 0x001c, 0x001d, 0x001e, 0x001f, - 0x0020, 0x0021, 0x0022, 0x0023, 0x0024, 0x0025, 0x0026, 0x0027, - 0x0028, 0x0029, 0x002a, 0x002b, 0x002c, 0x002d, 0x002e, 0x002f, - 0x0030, 0x0031, 0x0032, 0x0033, 0x0034, 0x0035, 0x0036, 0x0037, - 0x0038, 0x0039, 0x003a, 0x003b, 0x003c, 0x003d, 0x003e, 0x003f, - 0x0040, 0x0041, 0x0042, 0x0043, 0x0044, 0x0045, 0x0046, 0x0047, - 0x0048, 0x0049, 0x004a, 0x004b, 0x004c, 0x004d, 0x004e, 0x004f, - 0x0050, 0x0051, 0x0052, 0x0053, 0x0054, 0x0055, 0x0056, 0x0057, - 0x0058, 0x0059, 0x005a, 0x005b, 0x005c, 0x005d, 0x005e, 0x005f, - 0x0060, 0x0061, 0x0062, 0x0063, 0x0064, 0x0065, 0x0066, 0x0067, - 0x0068, 0x0069, 0x006a, 0x006b, 0x006c, 0x006d, 0x006e, 0x006f, - 0x0070, 0x0071, 0x0072, 0x0073, 0x0074, 0x0075, 0x0076, 0x0077, - 0x0078, 0x0079, 0x007a, 0x007b, 0x007c, 0x007d, 0x007e, 0x007f, - 0x20ac, -1, 0x201a, 0x0192, 0x201e, 0x2026, 0x2020, 0x2021, - 0x02c6, 0x2030, 0x0160, 0x2039, 0x0152, -1, 0x017d, 0x017e, - -1, 0x2018, 0x2019, 0x201c, 0x201d, 0x2022, 0x2013, 0x2014, - 0x02dc, 0x2122, 0x0161, 0x203a, 0x0153, -1, -1, 0x0178, - 0x00a0, 0x00a1, 0x00a2, 0x00a3, 0x00a4, 0x00a5, 0x00a6, 0x00a7, - 0x00a8, 0x00a9, 0x00aa, 0x00ab, 0x00ac, 0x00ad, 0x00ae, 0x00af, - 0x00b0, 0x00b1, 0x00b2, 0x00b3, 0x00b4, 0x00b5, 0x00b6, 0x00b7, - 0x00b8, 0x00b9, 0x00ba, 0x00bb, 0x00bc, 0x00bd, 0x00be, 0x00bf, - 0x00c0, 0x00c1, 0x00c2, 0x00c3, 0x00c4, 0x00c5, 0x00c6, 0x00c7, - 0x00c8, 0x00c9, 0x00ca, 0x00cb, 0x00cc, 0x00cd, 0x00ce, 0x00cf, - 0x00d0, 0x00d1, 0x00d2, 0x00d3, 0x00d4, 0x00d5, 0x00d6, 0x00d7, - 0x00d8, 0x00d9, 0x00da, 0x00db, 0x00dc, 0x00dd, 0x00de, 0x00df, - 0x00e0, 0x00e1, 0x00e2, 0x00e3, 0x00e4, 0x00e5, 0x00e6, 0x00e7, - 0x00e8, 0x00e9, 0x00ea, 0x00eb, 0x00ec, 0x00ed, 0x00ee, 0x00ef, - 0x00f0, 0x00f1, 0x00f2, 0x00f3, 0x00f4, 0x00f5, 0x00f6, 0x00f7, - 0x00f8, 0x00f9, 0x00fa, 0x00fb, 0x00fc, 0x00fd, 0x00fe, 0x00ff -}; -*/ - -#endif diff --git a/cet/iso_8859_8.h b/cet/iso_8859_8.h deleted file mode 100644 index 4801a396d..000000000 --- a/cet/iso_8859_8.h +++ /dev/null @@ -1,156 +0,0 @@ -/* - - Data automatically generated from recode output: - - 'recode -lf "ISO-8859-8" 2>/dev/null' - - - This program is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 2 of the License, or - (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program; if not, write to the Free Software - Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. - -*/ - -#ifndef iso_8859_8_h -#define iso_8859_8_h - -#define cet_cs_name_iso_8859_8 "ISO-8859-8" - -const char *cet_cs_alias_iso_8859_8[] = -{ - "ISO-8859-8", "csISOLatinHebrew", "hebrew", "ISO8859-8", - "iso-ir-138", "ISO_8859-8", "ISO_8859-8:1988", nullptr -}; - -#define cet_ucs4_ofs_iso_8859_8 161 -#define cet_ucs4_cnt_iso_8859_8 90 - -const int cet_ucs4_map_iso_8859_8[cet_ucs4_cnt_iso_8859_8] = -{ - -1, 0x00a2, 0x00a3, 0x00a4, 0x00a5, 0x00a6, 0x00a7, 0x00a8, - 0x00a9, 0x00d7, 0x00ab, 0x00ac, 0x00ad, 0x00ae, 0x203e, 0x00b0, - 0x00b1, 0x00b2, 0x00b3, 0x00b4, 0x00b5, 0x00b6, 0x00b7, 0x00b8, - 0x00b9, 0x00f7, 0x00bb, 0x00bc, 0x00bd, 0x00be, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, 0x2017, 0x05d0, - 0x05d1, 0x05d2, 0x05d3, 0x05d4, 0x05d5, 0x05d6, 0x05d7, 0x05d8, - 0x05d9, 0x05da, 0x05db, 0x05dc, 0x05dd, 0x05de, 0x05df, 0x05e0, - 0x05e1, 0x05e2, 0x05e3, 0x05e4, 0x05e5, 0x05e6, 0x05e7, 0x05e8, - 0x05e9, 0x05ea -}; - -#define cet_ucs4_to_iso_8859_8_ct 31 - -const cet_ucs4_link_t cet_ucs4_to_iso_8859_8_links[cet_ucs4_to_iso_8859_8_ct] = -{ - {0x00d7, 0xaa} /* sign */, - {0x00f7, 0xba} /* sign */, - {0x05d0, 0xe0} /* letter alef */, - {0x05d1, 0xe1} /* letter bet */, - {0x05d2, 0xe2} /* letter gimel */, - {0x05d3, 0xe3} /* letter dalet */, - {0x05d4, 0xe4} /* letter he */, - {0x05d5, 0xe5} /* letter vav */, - {0x05d6, 0xe6} /* letter zayin */, - {0x05d7, 0xe7} /* letter het */, - {0x05d8, 0xe8} /* letter tet */, - {0x05d9, 0xe9} /* letter yod */, - {0x05da, 0xea} /* letter final kaf */, - {0x05db, 0xeb} /* letter kaf */, - {0x05dc, 0xec} /* letter lamed */, - {0x05dd, 0xed} /* letter final mem */, - {0x05de, 0xee} /* letter mem */, - {0x05df, 0xef} /* letter final nun */, - {0x05e0, 0xf0} /* letter nun */, - {0x05e1, 0xf1} /* letter samekh */, - {0x05e2, 0xf2} /* letter ayin */, - {0x05e3, 0xf3} /* letter final pe */, - {0x05e4, 0xf4} /* letter pe */, - {0x05e5, 0xf5} /* letter final tsadi */, - {0x05e6, 0xf6} /* letter tsadi */, - {0x05e7, 0xf7} /* letter qof */, - {0x05e8, 0xf8} /* letter resh */, - {0x05e9, 0xf9} /* letter shin */, - {0x05ea, 0xfa} /* letter tav */, - {0x2017, 0xdf} /* low line */, - {0x203e, 0xaf} /* overline */ -}; - -/* -#define cet_ucs4_to_iso_8859_8_extra_ct 0 -const cet_ucs4_link_t cet_ucs4_to_iso_8859_8_extra[cet_ucs4_to_iso_8859_8_extra_ct] = {}; -*/ - -cet_cs_vec_t cet_cs_vec_iso_8859_8 = /* defined in cet.h */ -{ - cet_cs_name_iso_8859_8, /* name of character set */ - cet_cs_alias_iso_8859_8, /* alias table */ - - nullptr, /* ... to UCS-4 converter (multi-byte) */ - nullptr, /* UCS-4 to ... converter (multi-byte) */ - - cet_ucs4_map_iso_8859_8, /* char to UCS-4 value table */ - cet_ucs4_ofs_iso_8859_8, /* first non standard character */ - cet_ucs4_cnt_iso_8859_8, /* number of values in table */ - - cet_ucs4_to_iso_8859_8_links, /* UCS-4 to char links */ - cet_ucs4_to_iso_8859_8_ct, /* number of links */ - - nullptr, /* hand made UCS-4 links */ - 0, /* number of extra links */ - - nullptr /* for internal use */ -}; - - -/* -const int iso_8859_8_ucs4_full_map[] = -{ - 0x0000, 0x0001, 0x0002, 0x0003, 0x0004, 0x0005, 0x0006, 0x0007, - 0x0008, 0x0009, 0x000a, 0x000b, 0x000c, 0x000d, 0x000e, 0x000f, - 0x0010, 0x0011, 0x0012, 0x0013, 0x0014, 0x0015, 0x0016, 0x0017, - 0x0018, 0x0019, 0x001a, 0x001b, 0x001c, 0x001d, 0x001e, 0x001f, - 0x0020, 0x0021, 0x0022, 0x0023, 0x0024, 0x0025, 0x0026, 0x0027, - 0x0028, 0x0029, 0x002a, 0x002b, 0x002c, 0x002d, 0x002e, 0x002f, - 0x0030, 0x0031, 0x0032, 0x0033, 0x0034, 0x0035, 0x0036, 0x0037, - 0x0038, 0x0039, 0x003a, 0x003b, 0x003c, 0x003d, 0x003e, 0x003f, - 0x0040, 0x0041, 0x0042, 0x0043, 0x0044, 0x0045, 0x0046, 0x0047, - 0x0048, 0x0049, 0x004a, 0x004b, 0x004c, 0x004d, 0x004e, 0x004f, - 0x0050, 0x0051, 0x0052, 0x0053, 0x0054, 0x0055, 0x0056, 0x0057, - 0x0058, 0x0059, 0x005a, 0x005b, 0x005c, 0x005d, 0x005e, 0x005f, - 0x0060, 0x0061, 0x0062, 0x0063, 0x0064, 0x0065, 0x0066, 0x0067, - 0x0068, 0x0069, 0x006a, 0x006b, 0x006c, 0x006d, 0x006e, 0x006f, - 0x0070, 0x0071, 0x0072, 0x0073, 0x0074, 0x0075, 0x0076, 0x0077, - 0x0078, 0x0079, 0x007a, 0x007b, 0x007c, 0x007d, 0x007e, 0x007f, - 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087, - 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f, - 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097, - 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f, - 0x00a0, -1, 0x00a2, 0x00a3, 0x00a4, 0x00a5, 0x00a6, 0x00a7, - 0x00a8, 0x00a9, 0x00d7, 0x00ab, 0x00ac, 0x00ad, 0x00ae, 0x203e, - 0x00b0, 0x00b1, 0x00b2, 0x00b3, 0x00b4, 0x00b5, 0x00b6, 0x00b7, - 0x00b8, 0x00b9, 0x00f7, 0x00bb, 0x00bc, 0x00bd, 0x00be, -1, - -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, 0x2017, - 0x05d0, 0x05d1, 0x05d2, 0x05d3, 0x05d4, 0x05d5, 0x05d6, 0x05d7, - 0x05d8, 0x05d9, 0x05da, 0x05db, 0x05dc, 0x05dd, 0x05de, 0x05df, - 0x05e0, 0x05e1, 0x05e2, 0x05e3, 0x05e4, 0x05e5, 0x05e6, 0x05e7, - 0x05e8, 0x05e9, 0x05ea, -1, -1, -1, -1, -1 -}; -*/ - -#endif diff --git a/cet_util.cc b/cet_util.cc index d160ec2c0..9aab53821 100644 --- a/cet_util.cc +++ b/cet_util.cc @@ -19,288 +19,34 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */ -#include // for printf, snprintf -#include // for qsort - #include // for QByteArray #include // for QDebug #include // for QTextCodec #include "defs.h" -#include "cet.h" // for cet_cs_vec_t, cet_str_any_to_utf8, cet_str_utf8_to_any, cet_str_any_to_uni #include "cet_util.h" #include "src/core/logging.h" // for Fatal #define MYNAME "cet_util" -static cet_cs_vec_t* cet_cs_vec_root = nullptr; - -struct cet_cs_alias_t { - char* name; - cet_cs_vec_t* vec; -}; - -static cet_cs_alias_t* cet_cs_alias; -static int cet_cs_alias_ct = 0; -static int cet_cs_vec_ct = 0; - -/* %%% fixed inbuild character sets %%% */ - -#include "cet/ansi_x3_4_1968.h" // for cet_cs_vec_ansi_x3_4_1968, cet_cs_name_ansi_x3_4_1968 -#include "cet/cp1252.h" // for cet_cs_name_cp1252, cet_cs_vec_cp1252 -#include "cet/iso_8859_8.h" // for cet_cs_name_iso_8859_8, cet_cs_vec_iso_8859_8 - - -/* %%% short hand strings transmission for main character sets %%% */ - -short* -cet_str_utf8_to_uni(const char* src, int* length) -{ - return cet_str_any_to_uni(src, &cet_cs_vec_utf8, length); -} - -/* %%% cet_str_any_to_any %%% - * - * -->> for use in mkshort */ - -char* -cet_str_any_to_any(const char* src, const cet_cs_vec_t* src_vec, const cet_cs_vec_t* dest_vec) -{ - const cet_cs_vec_t* v_in = (src_vec != nullptr) ? src_vec : &cet_cs_vec_ansi_x3_4_1968; - const cet_cs_vec_t* v_out = (dest_vec != nullptr) ? dest_vec : &cet_cs_vec_ansi_x3_4_1968; - - if (src == nullptr) { - return nullptr; - } else if ((*src == '\0') || (v_in == v_out)) { - return xstrdup(src); - } - - char* c0 = (v_in == &cet_cs_vec_utf8) ? xstrdup(src) : cet_str_any_to_utf8(src, v_in); - char* c1 = (v_out == &cet_cs_vec_utf8) ? xstrdup(c0) : cet_str_utf8_to_any(c0, v_out); - xfree(c0); - - return c1; -} - -static signed int -cet_cs_alias_qsort_cb(const void* a, const void* b) -{ - const auto* va = (const cet_cs_alias_t*) a; - const auto* vb = (const cet_cs_alias_t*) b; - return case_ignore_strcmp(va->name, vb->name); -} - -void -cet_register_cs(cet_cs_vec_t* vec) -{ - if (vec->next == nullptr) { - vec->next = cet_cs_vec_root; - cet_cs_vec_root = vec; - cet_cs_vec_ct++; - } -} - -/* Dummy vector for our native character set */ - -const char* cet_cs_utf8_alias[] = { - "utf8", nullptr -}; - -cet_cs_vec_t cet_cs_vec_utf8 = { - CET_CHARSET_UTF8, - cet_cs_utf8_alias, - nullptr, - nullptr, - nullptr, - 0, - 0, - nullptr, - 0, - nullptr, - 0, - nullptr, -}; - -void -cet_register() -{ - int i, c; - - if (cet_cs_vec_root != nullptr) { - return; - } - - cet_cs_vec_ct = 0; - cet_register_cs(&cet_cs_vec_utf8); /* internal place holder */ - // Alias for "US-ASCII". -#ifdef cet_cs_name_ansi_x3_4_1968 - cet_register_cs(&cet_cs_vec_ansi_x3_4_1968); -#endif -#ifdef cet_cs_name_cp1252 - cet_register_cs(&cet_cs_vec_cp1252); -#endif -#ifdef cet_cs_name_iso_8859_8 - cet_register_cs(&cet_cs_vec_iso_8859_8); -#endif - - - if (cet_cs_vec_ct > 0) { - cet_cs_vec_t* p; - c = 0; - - /* enumerate count of all names and aliases */ - - for (p = cet_cs_vec_root; p != nullptr; p = p->next) { - c++; - if (p->alias != nullptr) { - const char** a = p->alias; - while ((*a) != nullptr) { - a++; - c++; - } - } - } - /* create name to vec table */ - - cet_cs_alias_t* list = (cet_cs_alias_t*) xcalloc(c, sizeof(*list)); - i = 0; - for (p = cet_cs_vec_root; p != nullptr; p = p->next) { - if (p->alias != nullptr) { - const char** a = p->alias; - - list[i].name = xstrdup(p->name); - list[i].vec = p; - i++; - while (*a != nullptr) { - list[i].name = xstrdup(*a); - list[i].vec = p; - i++; - a++; - } - } - } - qsort(list, c, sizeof(*list), cet_cs_alias_qsort_cb); - cet_cs_alias = list; - cet_cs_alias_ct = c; - - /* install fallback for ascii-like (first 128 ch.) character sets */ - for (i = 1250; i <= 1258; i++) { - char name[16]; - cet_cs_vec_t* vec; - - snprintf(name, sizeof(name), "WIN-CP%d", i); - if ((vec = cet_find_cs_by_name(name))) { - vec->fallback = &cet_cs_vec_ansi_x3_4_1968; - } - } - for (i = 1; i <= 15; i++) { - char name[16]; - cet_cs_vec_t* vec; - - snprintf(name, sizeof(name), "ISO-8859-%d", i); - if ((vec = cet_find_cs_by_name(name))) { - vec->fallback = &cet_cs_vec_ansi_x3_4_1968; - } - } - } -#ifdef CET_DEBUG - printf("We have registered %d character sets with %d aliases\n", cet_cs_vec_ct, cet_cs_alias_ct); -#endif -} - -cet_cs_vec_t* -cet_find_cs_by_name(const QString& name) -{ - cet_register(); - - if (cet_cs_alias == nullptr) { - return nullptr; - } - - int i = 0; - int j = cet_cs_alias_ct - 1; - - while (i <= j) { - int a = (i + j) >> 1; - auto n = &cet_cs_alias[a]; - int x = case_ignore_strcmp(name, n->name); - if (x == 0) { - return n->vec; - } else if (x < 0) { - j = a - 1; - } else { - i = a + 1; - } - } - return nullptr; -} - -void -cet_deregister() -{ - int j = cet_cs_alias_ct; - cet_cs_alias_t* p = cet_cs_alias; - - if (p == nullptr) { - return; - } - - cet_cs_alias_ct = 0; - cet_cs_alias = nullptr; - - for (int i = 0; i < j; i++) { - xfree(p[i].name); - } - xfree(p); -} - /* gpsbabel additions */ -int -cet_validate_cs(const QString& cs, cet_cs_vec_t** vec, QString* cs_name) -{ - if (cs.isEmpty()) { /* set default us-ascii */ - *vec = &cet_cs_vec_ansi_x3_4_1968; - *cs_name = CET_CHARSET_ASCII; - return 1; - } - - cet_cs_vec_t* v = cet_find_cs_by_name(cs); - if (v != nullptr) { - // TODO: make v->name into q QString and replace this... - char* tmp = xstrdup(v->name); - *cs_name = strupper(tmp); - xfree(tmp); - *vec = v; - return 1; - } else { - cs_name->clear(); - *vec = nullptr; - return 0; - } -} - void cet_convert_deinit() { - global_opts.charset = nullptr; global_opts.codec = nullptr; } void cet_convert_init(const QString& cs_name, const int force) { - if ((force != 0) || (global_opts.charset == nullptr)) { + if ((force != 0) || (global_opts.codec == nullptr)) { cet_convert_deinit(); - if (0 == cet_validate_cs(cs_name, &global_opts.charset, &global_opts.charset_name)) { - Fatal() << "Unsupported character set \"" << cs_name << "."; - } if (cs_name.isEmpty()) { /* set default us-ascii */ global_opts.codec = QTextCodec::codecForName(CET_CHARSET_ASCII); } else { - QByteArray ba = CSTR(cs_name); - global_opts.codec = QTextCodec::codecForName(ba); + global_opts.codec = QTextCodec::codecForName(CSTR(cs_name)); } if (!global_opts.codec) { Fatal() << "Unsupported character set \"" << cs_name << "."; diff --git a/cet_util.h b/cet_util.h index 83fe1f008..4d8c93a50 100644 --- a/cet_util.h +++ b/cet_util.h @@ -22,30 +22,9 @@ #ifndef CET_UTIL_H_INCLUDED_ #define CET_UTIL_H_INCLUDED_ -#if HAVE_CONFIG_H -#include "config.h" -#endif - #include -#include "cet.h" #include "defs.h" -cet_cs_vec_t* cet_find_cs_by_name(const QString& name); -void cet_register(); -void cet_deregister(); - -extern cet_cs_vec_t cet_cs_vec_cp1252; - -extern cet_cs_vec_t cet_cs_vec_ansi_x3_4_1968; - -short* cet_str_utf8_to_uni(const char* src, int* length); - -extern cet_cs_vec_t cet_cs_vec_utf8; - - -/* helpers */ - -[[deprecated]] char* cet_str_any_to_any(const char* src, const cet_cs_vec_t* src_vec, const cet_cs_vec_t* dest_vec); /* gpsbabel extensions */ diff --git a/defs.h b/defs.h index a655fb73b..ea8a6413a 100644 --- a/defs.h +++ b/defs.h @@ -186,8 +186,6 @@ struct global_options { int verbose_status; /* set by GUI wrappers for status */ int smart_icons; int smart_names; - cet_cs_vec_t* charset; - QString charset_name; inifile_t* inifile; QTextCodec* codec; }; diff --git a/garmin.cc b/garmin.cc index 669c94496..b53d70630 100644 --- a/garmin.cc +++ b/garmin.cc @@ -288,12 +288,6 @@ rw_init(const QString& fname) } - // If a user has specified a non-default character set, we'll trust - // them to sort our the wreckage of violating the Garmin protocol and - // ship characters to the device in that character set. - if (global_opts.charset != &cet_cs_vec_utf8) { - receiver_charset = xstrdup(global_opts.charset_name); - } if (global_opts.debug_level > 0) { fprintf(stderr, "Waypoint type: %d\n" "Chosen waypoint length %d\n", @@ -304,11 +298,6 @@ rw_init(const QString& fname) } } - // Allow override of sent character set for internationalized GPSes. - if (global_opts.charset != &cet_cs_vec_utf8) { - receiver_charset = xstrdup(global_opts.charset_name); - } - /* * If the user provided a short_length, override the calculated value. */ @@ -334,6 +323,14 @@ rw_init(const QString& fname) setshort_mustupper(mkshort_handle, receiver_must_upper); + /* + * This used to mean something when we used cet, but these days this + * format either use implicit QString conversions (utf8) which is + * likely a bug, or we have hard coded QString::fromLatin1 or CSTRc. + * So all the above detection of receiver_charset is for naught. + * But perhaps we will use an appropriate codec based on receiver_charset + * someday. + */ if (receiver_charset) { cet_convert_init(receiver_charset, 1); } @@ -520,7 +517,7 @@ track_read() if (trk_head == nullptr || array[i]->ishdr) { trk_head = route_head_alloc(); trk_head->rte_num = trk_num; - trk_head->rte_name = trk_name; + trk_head->rte_name = QString::fromLatin1(trk_name); trk_num++; track_add_head(trk_head); } @@ -599,7 +596,7 @@ route_read() rte_head = route_head_alloc(); route_add_head(rte_head); if (csrc) { - rte_head->rte_name = csrc; + rte_head->rte_name = QString::fromLatin1(csrc); } } else { if (array[i]->islink) { diff --git a/main.cc b/main.cc index 390ecc711..c193d92df 100644 --- a/main.cc +++ b/main.cc @@ -43,7 +43,7 @@ #endif #include "defs.h" -#include "cet_util.h" // for cet_convert_init, cet_convert_deinit, cet_deregister, cet_register, cet_cs_vec_utf8 +#include "cet_util.h" // for cet_convert_init, cet_convert_deinit #include "csv_util.h" // for csv_linesplit #include "filter.h" // for Filter #include "filter_vecs.h" // for FilterVecs @@ -643,7 +643,6 @@ main(int argc, char* argv[]) global_opts.objective = wptdata; global_opts.masked_objective = NOTHINGMASK; /* this makes the default mask behaviour slightly different */ - global_opts.charset_name.clear(); global_opts.inifile = nullptr; gpsbabel_time = current_time().toTime_t(); /* frozen in testmode */ @@ -654,14 +653,12 @@ main(int argc, char* argv[]) Vecs::Instance().init_vecs(); FilterVecs::Instance().init_filter_vecs(); - cet_register(); session_init(); waypt_init(); route_init(); rc = run(prog_name); - cet_deregister(); waypt_flush_all(); route_deinit(); session_exit(); -- 2.30.2